{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "bfddff72",
   "metadata": {},
   "source": [
    "# 神经网络基础\n",
    "\n",
    "视频：https://www.bilibili.com/video/BV1AK4y1P7vs\n",
    "\n",
    "5章：https://zh-v2.d2l.ai/chapter_deep-learning-computation/index.html\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2fd6ee3c",
   "metadata": {},
   "source": [
    "# 一、模型构造"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4be7c889",
   "metadata": {},
   "source": [
    "## 1.1多层感知机定义\n",
    "\n",
    "```\n",
    "网络 = nn.Sequential(层, 激活函数, 层)\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7e7f6f1c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.0711, -0.0966,  0.3948, -0.0431, -0.0378, -0.1735,  0.0194, -0.1121,\n",
       "         -0.2351, -0.1535],\n",
       "        [-0.0433, -0.0640,  0.3070, -0.0714,  0.0027, -0.3321, -0.1279, -0.0256,\n",
       "         -0.1638, -0.0984]], grad_fn=<AddmmBackward>)"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "from torch import nn\n",
    "from torch.nn import functional as F #定义了没有参数的函数\n",
    "\n",
    "net = nn.Sequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256,10))\n",
    "\n",
    "X = torch.rand(2,20)\n",
    "net(X)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bae87226",
   "metadata": {},
   "source": [
    "## 1.2自定义块\n",
    "```\n",
    "class 网络(nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.层名 = 层()\n",
    "    \n",
    "    def forward(self, X):\n",
    "        X = self.层名(X)\n",
    "        X = 激活函数(X)\n",
    "    \n",
    "    return X\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "69efe389",
   "metadata": {},
   "outputs": [],
   "source": [
    "class MLP(nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.hidden = nn.Linear(20, 256)\n",
    "        self.out = nn.Linear(256, 10)\n",
    "        \n",
    "    def forward(self, X):\n",
    "        X = self.hidden(X)\n",
    "        X = F.relu(X)\n",
    "        X = self.out(X)\n",
    "        return X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0159ad88",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.1092,  0.0554,  0.0318, -0.1181, -0.0251,  0.1486, -0.0087,  0.0135,\n",
       "          0.1106,  0.2357],\n",
       "        [ 0.0996, -0.0487,  0.0395, -0.1800,  0.0966,  0.2124,  0.0480, -0.3080,\n",
       "          0.1353,  0.1662]], grad_fn=<AddmmBackward>)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net = MLP()\n",
    "net(X)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "42f9006d",
   "metadata": {},
   "source": [
    "## 1.3顺序块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "786eaa9a",
   "metadata": {},
   "outputs": [],
   "source": [
    "class MySequential(nn.Module):\n",
    "    def __init__(self, *args):\n",
    "        super().__init__()\n",
    "        for block in args:\n",
    "            self._modules[block] = block\n",
    "            \n",
    "    def forward(self, X):\n",
    "        for block in self._modules.values():\n",
    "            X = block(X)\n",
    "        return X\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "8c71469e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.0132,  0.0638,  0.0390,  0.0499,  0.1299,  0.0576, -0.1571,  0.0073,\n",
       "         -0.1448, -0.0544],\n",
       "        [ 0.0395,  0.2598, -0.0891,  0.1879,  0.2393,  0.0194, -0.1408,  0.0040,\n",
       "         -0.1866, -0.0431]], grad_fn=<AddmmBackward>)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net = MySequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256,10))\n",
    "net(X)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ed89adad",
   "metadata": {},
   "source": [
    "# 二、参数管理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d7a89bce",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[0.5972],\n",
       "        [0.4275]], grad_fn=<AddmmBackward>)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "from torch import nn\n",
    "\n",
    "net = nn.Sequential(nn.Linear(4,8), nn.ReLU(), nn.Linear(8,1))\n",
    "X = torch.rand(size=(2,4))\n",
    "net(X)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cdc9d90e",
   "metadata": {},
   "source": [
    "## 2.1访问参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "38c3a7a8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OrderedDict([('weight', tensor([[ 0.2188, -0.2106, -0.2614,  0.1184, -0.0556, -0.2155,  0.3012, -0.0851]])), ('bias', tensor([0.1198]))])\n"
     ]
    }
   ],
   "source": [
    "# 得到nn.Linear(8,1)的权重参数\n",
    "\n",
    "print(net[2].state_dict())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "599686f7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parameter containing:\n",
      "tensor([0.1198], requires_grad=True)\n",
      "tensor([0.1198])\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "print(net[2].bias)\n",
    "print(net[2].bias.data) # 参数\n",
    "print(net[2].bias.grad) # 梯度"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2e1728b2",
   "metadata": {},
   "source": [
    "## 2.2参数初始化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "96ecc19d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OrderedDict([('weight', tensor([[ 0.0042, -0.0042, -0.0145, -0.0134, -0.0028,  0.0024,  0.0099, -0.0116]])), ('bias', tensor([0.]))])\n",
      "Parameter containing:\n",
      "tensor([0.], requires_grad=True)\n"
     ]
    }
   ],
   "source": [
    "# 用内置的初始化器。下面的代码将所有权重参数初始化为标准差为0.01的高斯随机变量，且将偏置参数设置为0\n",
    "def init_normal(m):\n",
    "    if type(m) == nn.Linear:\n",
    "        nn.init.normal_(m.weight, mean=0, std=0.01)\n",
    "        nn.init.zeros_(m.bias)\n",
    "        \n",
    "net.apply(init_normal)\n",
    "\n",
    "print(net[2].state_dict()) # 权重\n",
    "\n",
    "print(net[2].bias) # 参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "9473cbc4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([42.0000,  0.9958,  0.9855,  0.9866,  0.9972,  1.0024,  1.0099,  0.9884])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 直接设置参数\n",
    "net[2].weight.data[:] += 1\n",
    "net[2].weight.data[0, 0] = 42\n",
    "\n",
    "net[2].weight.data[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "27ff5229",
   "metadata": {},
   "source": [
    "# 三、加载保存模型参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "a0a061c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "class MLP(nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.hidden = nn.Linear(20, 256)\n",
    "        self.output = nn.Linear(256, 10)\n",
    "\n",
    "    def forward(self, x):\n",
    "        return self.output(F.relu(self.hidden(x)))\n",
    "\n",
    "net = MLP()\n",
    "X = torch.randn(size=(2, 20))\n",
    "Y = net(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "96be461f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 保存参数\n",
    "torch.save(net.state_dict(), 'mlp.params')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f9c7e53",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 加载参数\n",
    "clone = MLP()\n",
    "clone.load_state_dict(torch.load('mlp.params'))\n",
    "clone.eval()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6b252d44",
   "metadata": {},
   "source": [
    "# 四、训练模型"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5ab59f71",
   "metadata": {},
   "source": [
    "## 4.1定义损失函数和优化器"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a8bc2c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "#反向传播算法 SGD Adam等\n",
    "optimizer = torch.optim.Adam(model.parameters(), lr=0.01)\n",
    "#均方损失函数\n",
    "criterion =\ttorch.nn.MSELoss()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9ea6b313",
   "metadata": {},
   "source": [
    "## 4.2训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6830de60",
   "metadata": {},
   "outputs": [],
   "source": [
    "from torch.autograd import Variable\n",
    "\n",
    "EPOCH = 3 # 训练轮数\n",
    "\n",
    "#记录用于绘图\n",
    "losses = []#记录每次迭代后训练的loss\n",
    "eval_losses = []#测试的\n",
    "\n",
    "# 自定义训练方法\n",
    "\n",
    "for epoch in range(EPOCH):  # 训练轮数\n",
    "    # 遍历训练集每条数据，进行训练，得到每轮损失loss\n",
    "    running_loss = 0.0\n",
    "    for step, (x, y) in enumerate(train_loader):   #【enumerate()枚举对象 得到格式（id，元素）】\n",
    "        b_x = Variable(x) # 数据x\n",
    "        b_y = Variable(y) # 标签y\n",
    "    \n",
    "        output = model_2(b_x) # 把数据输入进网络\n",
    "        loss = loss_func(output, b_y) # 损失\n",
    "        running_loss += loss.item() # 损失累加\n",
    "        \n",
    "        optimizer.zero_grad() # 梯度置零\n",
    "        loss.backward()  # loss反向传播\n",
    "        optimizer.step() # 反向传播后参数更新\n",
    "        \n",
    "    losses.append(running_loss/len(train_loader))    #记录改论平均损失，用于后续画图\n",
    "    print('训练轮数：', epoch, ' 训练平均损失loss：',running_loss/len(train_loader)) #平均损失=每轮每条训练数据损失求和/每轮训练数据数\n",
    "    \n",
    "    \n",
    "    \n",
    "    # 计算测试集每轮训练后损失val_loss\n",
    "    running_loss = 0.0\n",
    "    for step, (x, y) in enumerate(test_loader): \n",
    "        b_x = Variable(x) # 数据x\n",
    "        b_y = Variable(y) # 标签y\n",
    "        \n",
    "        output = model_2(b_x) # 把数据输入进网络\n",
    "        loss = loss_func(output, b_y) # 损失\n",
    "        running_loss += loss.item() # 损失累加\n",
    "    \n",
    "    eval_losses.append(running_loss/len(test_loader))    #记录改论平均损失，用于后续画图\n",
    "    print('训练轮数：', epoch, ' 测试平均损失val_loss：',running_loss/len(test_loader)) #平均损失=每轮每条训练数据损失求和/每轮训练数据数\n",
    "    \n",
    "        \n",
    "    \n",
    "print('end') \n",
    "    "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
